# read in data
lizards <- read_csv(here("data_tidy", "lizards.csv"))
## Rows: 1628 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): date, scientific_name, common_name, zone, site, plot, spp, sex, rc...
## dbl (6): pit, toe_num, sv_length, total_length, weight, pc
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# type `here()` in the console to determine where "here" is
Histogram (really bad) of lizard weights
# histograms only need one variable
ggplot(data = lizards, aes(x = weight)) +
geom_histogram(fill = "orange", color = "blue",
size = 2,
linetype = "dotted")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Color refers to points and lines. Fill refers to polygons
# Size of polygon refers to the width of the border
Scatter Plot
# total_length on x-axis; weight on y-axis
# each shape number corresponds a different shape (0-25). some have fill colors. or you can use ascii codes
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point(shape = 22,
fill = "yellow",
color = "red",
size = 3,
alpha = 0.7) # transparency: 0 = completelly transparent, 1 = opaque
Bubble plot (take a point and change its size based on a variable)
# bubble plot where the color of the points changes based on common_name and the size of points varies based on total_lenth.
# DISCLAIMER: BAD IDEA
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point(aes(color = common_name, size = total_length),
shape = 22,
fill = "black") +
theme_light()
Facet grid vs. facet wrap #### facet wrap is a bunch of graphs in a row that wraps below if needed for space
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point(aes(color = common_name)) +
theme_light() +
facet_wrap(~common_name, ncol = 4) # `~` means "by"
# you can change the number of columns in facet wrap. fills up first row first, then goes onto the next row
Facet Grid
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point(aes(color = common_name)) +
facet_grid(sex ~ tail) # row, then columns
# populates with data at each intersection
Getting things in order (default is alphabetical)
# find total lizard counts by common_name
lizard_count <- lizards %>%
group_by(common_name) %>%
summarise(count = n())
# or
lizard_count2 <- lizards %>% dplyr::count(common_name)
lizard_count_name_tail <- lizards %>% count(common_name, tail)
# make common_name a factor, and assign each factor a level based on a value in another variable. Add a negative sign infront of the other variable to flip the order.
# `fct_reorder` = factor reorder
ggplot(data = lizard_count, aes(y = fct_reorder(common_name, count), x = count)) +
geom_col(aes(fill = common_name), show.legend = FALSE) +
labs(x = "Lizard counts",
y = "Number of Lizards")
# switched y and x to make it more readable or use `coord_flip()`
Try converting common name to an ordered factor outside of ggplot
# make common name a factor based on count
common_name_factor <- lizard_count %>%
mutate(common_name = fct_reorder(common_name, count))
levels(common_name_factor$common_name)
## [1] "texas horned" "eastern fence" "colorado checkered"
## [4] "lesser earless" "little striped" "western whiptail"
## [7] "side-blotched lizard"
Scatterplot: total_length (x) vs. weight (y)
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point() +
scale_x_continuous(breaks = c(0, 10, 50, 100, 500),
limits = c(0, 500),
expand = c(0, 0)) +
scale_y_continuous(breaks = seq(from = 0, to = 70, by = 10),
limits = c(0, 70),
expand = c(0, 0))
# breaks are the tic mark breaks. the vector can be created manually or by sequence
# limits are min and max value
Transform the date column to class Date, then find counts of observations by date
lizard_obs_count <- lizards %>% # if you attach `lubridate` up top you dont need `lubridate::`
mutate(date = lubridate::mdy(date)) %>%
count(date) %>%
mutate(count = n)
class(lizard_obs_count$date)
## [1] "Date"
Make a line plot (geom_line()) of date (x) and count (y)
# `y` lowercase is 2 didget year, `Y` upper case is a 4 year date
ggplot(data = lizard_obs_count, aes(x = date, y = count)) +
geom_line() +
scale_x_date(date_breaks = "3 years",
date_labels = "%Y")
Color Scales
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point(aes(color = weight)) +
scale_color_gradient(low = "red", high = "navy")
# scale_color_gradient for 2 colors. scale)color)gradientn for multiple colors
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point(aes(color = weight)) +
scale_color_gradientn(colors = c("orange", "cyan", "chartreuse", "black"))
# to set breaks between colors
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point(aes(color = weight)) +
scale_color_stepsn(colors = c("green", "blue", "purple"),
breaks = c(0, 20, 40)) # if you give it more breaks than colors, it will great a gradient between colors
#### Update a color scheme using a palette in paletteer Make a horizontal boxplot with common_name on the y-axis, total_length on the x-axis, with color changing based on common_name
lizards_fct <- lizards %>%
mutate(common_name = fct_reorder(common_name, total_length, .fun = median))
ggplot(data = lizards_fct, aes(y = common_name, x = total_length)) +
geom_boxplot(aes(fill = common_name), show.legend = FALSE) +
scale_fill_paletteer_d(palette = "beyonce::X10") #palette = "package::palette"
View(paletts_d_names) note package name and palette name Themes
ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_point() +
theme(panel.background = element_rect(fill = "yellow", color = "purple", size = 10),
panel.grid.major.x = element_line(color = "red", size = 2), # most specific command wins
panel.grid.major = element_line(color = "blue"),
panel.grid.major.y = element_line(color = "orange", size = 1),
plot.background = element_rect(fill = "cyan4"),
axis.text.x = element_text(color = "orange"),
axis.title = element_text(color = "yellow", size = 12))
# use `panel.grid = element_blank()` for no grid lines
# panel is about where the data is shown
# plot is the larger background
great for adding labels to things
## make a subset from lizards, called ww_lizards, that only contains observations for "western whiptail" lizards from the site "sand"
ww_lizards <- lizards %>%
filter(common_name == "western whiptail", site == "sand")
ggplot(data = ww_lizards, aes(x = total_length, y = weight)) +
geom_point() +
geom_text_repel(aes(label = toe_num), size = 3, max.overlaps = 20)
# View(gapminder) dataset
# Filter for only Eurpope, then create a scatterplot of gdpPercap (x) and lifeEpx (y) and lifeExp (y), labeled by country name
gapminder_sub <- gapminder %>%
filter(continent == "Europe", year == 2007)
ggplot(data = gapminder_sub, aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
geom_text_repel(aes(label = country), size = 3, max.overlaps = 20)
## gghighlight allows you to specify conditions where points or conditions will be highlighted from others
p <- ggplot(lizards, aes(x = total_length, y = weight)) +
geom_point()
p + theme_dark()
p + gghighlight(toe_num == 250, label_key = toe_num)
q <- ggplot(data = lizards, aes(x = total_length, y = weight)) +
geom_line(aes(color = common_name)) +
gghighlight(max(weight) >30)
## label_key: common_name
q
## patchwork for compound figures the job of patchwork is to make it easier to apply common themes to multiple figures in a graphic
p | q
# use pemdas
# only works if the patchwork package is attached
(p | q) / q
(p * q) * (p / q) &
theme_minimal()
## A few new graph types ### Marginal plots (from ggExtra)
whiptails <- lizards %>%
filter(common_name == "western whiptail") %>%
drop_na(total_length, weight)
# unique(whiptails$common_name)
ggplot(data = whiptails, aes(x = total_length, y = weight)) +
geom_point() +
geom_rug()
Marginal plot with boxplots
my_plot <- ggplot(data = whiptails, aes(x = total_length, y = weight)) +
geom_point(aes(color = sex), size = 2) +
scale_color_manual(values = c("cyan4", "black", "goldenrod"),
name = "Sex:",
labels = c("female", "juvenile", "male")) +
theme_minimal() +
theme(legend.position = "bottom")
ggMarginal(my_plot, type = "boxplot", groupColour = TRUE)
## ggMarginal also works with histograms and density plots
ggplot(data = whiptails, aes(x = sex, y = weight)) +
geom_beeswarm() +
geom_violin(fill = NA) +
geom_boxplot(fill = NA)
### A heatmap with geom_tile()
# make a new data frame called lizard_count, starting from lizards, with date converted to class Date. Then count by year and common name the number of lizards observed.
lizard_count2 <- lizards %>%
mutate(date = lubridate::mdy(date)) %>%
count(year = lubridate::year(date), common_name) %>%
drop_na()
# or use mutate(year = lubridate::year(date))
# or sure group_by, summarize n()
ggplot(data = lizard_count2, aes(x = year, y = common_name)) +
geom_tile(aes(fill = n), show.legend = FALSE) +
geom_text(aes(label = n), color = "white", size = 3) +
scale_fill_gradientn(colors = c("navy", "red", "orange")) +
theme_minimal() +
labs(x = "Year", y = "Lizard common name")
# gradientn allows you to pick as many colors as you want
# scale_fill_viridis_c() is color blind friendly
ggplot(data = lizard_count2, aes(x = year, y = common_name)) +
geom_tile(aes(fill = n), show.legend = FALSE) +
geom_text(aes(label = n), color = "white", size = 3) +
scale_fill_viridis_c()
## Make a map! Use
read_sf to read in the “doc.kml” file
# type `here()` in the console to determine where you are
jornada_vegetation <- read_sf(here("data_raw", "spatial_vegetation", "doc.kml")) %>%
select(Name) %>%
clean_names()
ggplot(data = jornada_vegetation) +
geom_sf(aes(fill = name), color = NA) +
scale_fill_paletteer_d(palette = "ggthemes::manyeys") +
labs(x = "Longitude",
y = "Latitude",
fill = "Dominant vegetation:")
# `scale_fill_paletteer_d()` because the names are discrete values rather than continuous
jornada_vegetation <- read_sf(here("data_raw", "spatial_vegetation", "doc.kml")) %>%
select(Name) %>%
clean_names()
ggplot(data = jornada_vegetation) +
geom_sf(aes(fill = name), color = NA) +
scale_fill_paletteer_d(palette = "ggthemes::manyeys") +
theme_void()
set this up in the top of the code chunck
library(palmerpenguins)
ggplot(penguins, aes(x = body_mass_g)) +
geom_histogram() +
facet_wrap(~species)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).